---
title: "Country Patents Vis"
output: html_document
date: "2024-03-27"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

rm(list = ls())
cat("\014")
```


```{r}
library(tidyverse)
library(vroom)
library(mapview)
library(maps)
library(ggmap)
library(usmap)
library(ggspatial)
library(ggthemes)
library(DescTools)
library(haven)
library(RColorBrewer)
library(stargazer)
options(scipen = 999)
```


```{r}
#data <- read.csv("y02.country.full.csv")

data <- read.csv("country.y02.csv")


data$patent_year <- substr(data$patent_date, 1, 4)

```

```{r}
# inventor_disambig_country

# assignee_disambig_country

annual.data <- data %>%
  filter(!is.na(disambig_country)) %>%
  mutate(country = case_when(disambig_country == "US" ~ "US",
                             disambig_country == "DE" ~ "DE",
                             disambig_country == "JP" ~ "JP",
                             disambig_country == "CN" ~ "CN",
                             TRUE ~ "RoW")) %>%
  distinct(patent_id, .keep_all = T) %>%
  group_by(country, patent_year) %>%
  summarise(patent_count = n_distinct(patent_id))
```


```{r}
library(ggrepel)
library(MASS)
library(RColorBrewer) 

# Here, I am specifying a specific color palette -- instead of using ggplot's 
# defaults. 

colors <- brewer.pal(n = 6, "Set1") 
# Here, I am choosing specific colors to use below

red <- colors[1] 
blue <- colors[2] 
green <- colors[3]
purple <- colors[4]
orange <- colors[5]
yellow <- colors[6]

# Define custom colors for the countries
custom_colors <- c("US" = "#1f77b4",  # Example color for the US
                   "DE" = "#ff7f0e",  # Example color for Germany
                   "JP" = "#2ca02c",  # Example color for Japan
                   "CN" = "#d62728",  # Example color for China
                   "Other" = "#9467bd")  # Example colo




# Define custom colors
custom_colors <- c(orange, green, purple, blue, red)

annual.data$patent_year <- as.numeric(annual.data$patent_year)

annual.data %>%
  filter(patent_year > 1980 & patent_year < 2025) %>%
  filter(!country == "RoW") %>%
  #filter(!country == "DE") %>%
  ggplot(aes(x = patent_year, y = patent_count, group = country, color = country,
             linetype = country)) +
  geom_line(size = 1.1) +
  scale_color_manual(values = custom_colors) + # Set custom colors  
    scale_y_continuous(labels = scales::comma) +  
  scale_x_continuous(breaks = c(1976, 1985, 1995, 2005, 2015, 2024)) +
  theme_clean() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        plot.title = element_text(family = "serif", size = 13, face = "bold"),
        plot.caption = element_text(family = "serif", size = 10, hjust = 1)) +  # Style caption
    annotate("text", x = 2019, y = 750, label = "CN", size = 5, fontface = "bold", family = "serif") +
  annotate("text", x = 2019, y = 12000, label = "US", size = 5, fontface = "bold", family = "serif") +
  annotate("text", x = 2019, y = 4700, label = "JP", size = 5, fontface = "bold", family = "serif") +
    annotate("text", x = 2015, y = 2600, label = "DE", size = 5, fontface = "bold", family = "serif") + 


  #annotate("text", x = 2019, y = 2600, label = "DE", size = 3.5, family = "serif") + # Use serif for annotations
  ggtitle("Annual Y02 Patent Grants at USPTO by Select Country") + 
  labs(caption = "Data Source: USPTO; Author's Illustration", family = "serif")



width <- 8 

ggsave("Y02.USPTO.ctry.presentation.png", width = width, height = width/1.618, units = "in", dpi = 700)
```



```{r}

# Define custom colors for the countries
custom_colors <- c("US" = blue,  # Example color for the US
                   "DE" = orange,  # Example color for Germany
                   "JP" = green,  # Example color for Japan
                   "CN" = red,  # Example color for China
                   "Other" = purple)  # Example colo


annual.data %>%
  filter(patent_year > 1980 & patent_year < 2025) %>%
  filter(!country == "RoW") %>%
  ggplot(aes(x = patent_year, y = patent_count, group = country, color = country,
             linetype = country)) +
  geom_line() +
  scale_color_manual(values = custom_colors) + # Set custom colors     
  theme_clean() + 
  xlab("") + ylab("") + 
  theme(legend.position = "none",
        plot.title = element_text(family = "serif", size = 13, face = "bold"),
        plot.caption = element_text(family = "serif", size = 10, hjust = 1)) +  # Style caption
  annotate("text", x = 2019, y = 750, label = "CN", size = 3.5, family = "serif") + # Use serif for annotations
  annotate("text", x = 2019, y = 12500, label = "US", size = 3.5, family = "serif") + # Use serif for annotations
  annotate("text", x = 2019, y = 5000, label = "JP", size = 3.5, family = "serif") + # Use serif for annotations
  annotate("text", x = 2019, y = 2600, label = "DE", size = 3.5, family = "serif")


width <- 8 

ggsave("Y02.USPTO.ctry.book.png", width = width, height = width/1.618, units = "in", dpi = 700)

```





```{r}
annual.data <- data %>%
  filter(!is.na(disambig_country)) %>%
  mutate(country = case_when(disambig_country == "US" ~ "US",
                             TRUE ~ "RoW")) %>%
  distinct(patent_id, .keep_all = T) %>%
  group_by(country, patent_year) %>%
  summarise(patent_count = n_distinct(patent_id))
```

```{r}
getwd()
```




```{r}
annual.data <- annual.data %>%
  group_by(patent_year) %>%
  mutate(total = sum(patent_count))

annual.data <- annual.data %>%
  mutate(percent = patent_count / total)



colors <- brewer.pal(n = 2, "Set1") 
# Here, I am choosing specific colors to use below

red <- colors[1] 
blue <- colors[2] 


# Define custom colors
custom_colors <- c(blue, red)

```



```{r}
all <- vroom("US patent data .csv")

all <- all %>%
  dplyr::select(`Year of Application or Grant` ,`Total Patent Grants, Foreign Origin Percent Share *`) %>%
  mutate(foreign_grant_share = `Total Patent Grants, Foreign Origin Percent Share *`,
         year = `Year of Application or Grant`)

all$year <- as.numeric(all$year)
all$foreign_grant_share <- as.numeric(all$foreign_grant_share)

all$us_grant_share <- (100 - all$foreign_grant_share) * 0.01
```



```{r}


annual.data$year <- as.numeric(annual.data$patent_year)
annual.data <- left_join(annual.data, all, by = "year")




```



*Semiconductor Patents, specifically H01L.semiconductor.patents*

```{r}
semis <- vroom("H01L.semiconductor.patents.csv")
semis$year <- semis$patent_year
#combine with other data

semis <- semis %>%
  dplyr::select(country, year, percent.semic.pats)





test <- left_join(annual.data, semis, by = c("country", "year"))

```

```{r}
biot <- vroom("percent.biotech.patents.csv")

biot$year <- biot$patent_year
biot <- biot %>%
  dplyr::select(country, year, percent.biotech.patents)


test <- left_join(test, biot, by = c("country", "year"))
```


*Solar and Batteries*

```{r}
solar <- vroom("percent.solar.patents.csv")

solar$year <- solar$patent_year

solar <- solar %>%
  dplyr::select(country, year, percent.solar.patents)

test <- left_join(test, solar, by = c("country", "year"))
```





```{r}
test %>%
  filter(country == "US") %>%
  filter(patent_year > 1976 & patent_year < 2021) %>%
  ggplot() +
  geom_line(aes(x = year, y = percent.solar.patents, color = "percent.solar.patents")) +
  geom_line(aes(x = year, y = us_grant_share, color = "us_grant_share_line")) + 
  geom_line(aes(x = year, y = percent.semic.pats, color = "percent.semic.pats")) + 
  geom_line(aes(x = year, y = percent.biotech.patents, color = "percent.biotech.patents")) + 
  scale_color_manual(values = c("percent.solar.patents" = "green", "us_grant_share_line" = "black",
                                "percent.semic.pats" = "red", 
                                "percent.biotech.patents" = "blue")) +
  theme_classic() + 
  xlab("") + 
  ylab("") + 
  theme(legend.position = "none")
```

```{r}
library(dplyr)
library(ggplot2)
library(scales) # Needed for the percent format

test %>%
  filter(country == "US") %>%
  filter(patent_year > 1976 & patent_year < 2021) %>%
  ggplot() +
  geom_line(aes(x = year, y = percent.solar.patents, color = "percent.solar.patents")) +
  geom_line(aes(x = year, y = us_grant_share, color = "us_grant_share_line")) + 
  geom_line(aes(x = year, y = percent.semic.pats, color = "percent.semic.pats")) + 
  geom_line(aes(x = year, y = percent.biotech.patents, color = "percent.biotech.patents")) + 
  scale_color_manual(values = c("percent.solar.patents" = "green", 
                                "us_grant_share_line" = "black",
                                "percent.semic.pats" = "red", 
                                "percent.biotech.patents" = "blue")) +
  scale_y_continuous(labels = percent_format()) +
  theme_classic() + 
  xlab("") + 
  ylab("Percentage Patents Granted to US Inventors") + 
  theme(legend.position = "none")

```


```{r}

test %>%
  filter(country == "US") %>%
  filter(patent_year > 1976 & patent_year < 2021) %>%
  ggplot() +
  geom_line(aes(x = year, y = percent.solar.patents, color = "percent.solar.patents")) +
  geom_line(aes(x = year, y = us_grant_share, color = "us_grant_share_line")) + 
  geom_line(aes(x = year, y = percent.semic.pats, color = "percent.semic.pats")) + 
  geom_line(aes(x = year, y = percent.biotech.patents, color = "percent.biotech.patents")) + 
scale_color_manual(values = c(
  "percent.solar.patents" = "#006400",  # Dark green
  "us_grant_share_line" = "black",
  "percent.semic.pats" = "red",
  "percent.biotech.patents" = "blue"
)) +
  scale_y_continuous(labels = percent_format()) +
  theme_clean() + 
  xlab("") + 
  ylab("% Patents Granted to US Inventors") + 
  theme(legend.position = "none",
        text = element_text(family = "serif"),
        axis.title = element_text(family = "serif"),
        axis.text = element_text(family = "serif")) +
  annotate("text", x = 1982, y = .84, label = "Solar and Batteries", color = "#006400", hjust = 0, family = "serif") +
  annotate("text", x = 2001, y = .58, label = "Average", color = "black", hjust = 0, family = "serif") +
  annotate("text", x = 2012, y = .37, label = "Semiconductors", color = "red", hjust = 0, family = "serif") +
  annotate("text", x = 2014, y = 0.65, label = "Biotechnology", color = "blue", hjust = 0, family = "serif")




width <- 8 

ggsave("Percent.Patents.png", width = width, height = width/1.618, units = "in", dpi = 500)

```



